{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Pandas数据统计函数"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "1. 汇总类统计\n",
    "2. 唯一去重和按值计数\n",
    "3. 相关系数和协方差"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 0、读取csv数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "fpath = \"./datas/beijing_tianqi/beijing_tianqi_2018.csv\"\n",
    "df = pd.read_csv(fpath)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ymd</th>\n",
       "      <th>bWendu</th>\n",
       "      <th>yWendu</th>\n",
       "      <th>tianqi</th>\n",
       "      <th>fengxiang</th>\n",
       "      <th>fengli</th>\n",
       "      <th>aqi</th>\n",
       "      <th>aqiInfo</th>\n",
       "      <th>aqiLevel</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>2018-01-01</td>\n",
       "      <td>3℃</td>\n",
       "      <td>-6℃</td>\n",
       "      <td>晴~多云</td>\n",
       "      <td>东北风</td>\n",
       "      <td>1-2级</td>\n",
       "      <td>59</td>\n",
       "      <td>良</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>2018-01-02</td>\n",
       "      <td>2℃</td>\n",
       "      <td>-5℃</td>\n",
       "      <td>阴~多云</td>\n",
       "      <td>东北风</td>\n",
       "      <td>1-2级</td>\n",
       "      <td>49</td>\n",
       "      <td>优</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>2018-01-03</td>\n",
       "      <td>2℃</td>\n",
       "      <td>-5℃</td>\n",
       "      <td>多云</td>\n",
       "      <td>北风</td>\n",
       "      <td>1-2级</td>\n",
       "      <td>28</td>\n",
       "      <td>优</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          ymd bWendu yWendu tianqi fengxiang fengli  aqi aqiInfo  aqiLevel\n",
       "0  2018-01-01     3℃    -6℃   晴~多云       东北风   1-2级   59       良         2\n",
       "1  2018-01-02     2℃    -5℃   阴~多云       东北风   1-2级   49       优         1\n",
       "2  2018-01-03     2℃    -5℃     多云        北风   1-2级   28       优         1"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 替换掉温度的后缀℃\n",
    "df.loc[:, \"bWendu\"] = df[\"bWendu\"].str.replace(\"℃\", \"\").astype('int32')\n",
    "df.loc[:, \"yWendu\"] = df[\"yWendu\"].str.replace(\"℃\", \"\").astype('int32')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ymd</th>\n",
       "      <th>bWendu</th>\n",
       "      <th>yWendu</th>\n",
       "      <th>tianqi</th>\n",
       "      <th>fengxiang</th>\n",
       "      <th>fengli</th>\n",
       "      <th>aqi</th>\n",
       "      <th>aqiInfo</th>\n",
       "      <th>aqiLevel</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>2018-01-01</td>\n",
       "      <td>3</td>\n",
       "      <td>-6</td>\n",
       "      <td>晴~多云</td>\n",
       "      <td>东北风</td>\n",
       "      <td>1-2级</td>\n",
       "      <td>59</td>\n",
       "      <td>良</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>2018-01-02</td>\n",
       "      <td>2</td>\n",
       "      <td>-5</td>\n",
       "      <td>阴~多云</td>\n",
       "      <td>东北风</td>\n",
       "      <td>1-2级</td>\n",
       "      <td>49</td>\n",
       "      <td>优</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>2018-01-03</td>\n",
       "      <td>2</td>\n",
       "      <td>-5</td>\n",
       "      <td>多云</td>\n",
       "      <td>北风</td>\n",
       "      <td>1-2级</td>\n",
       "      <td>28</td>\n",
       "      <td>优</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          ymd  bWendu  yWendu tianqi fengxiang fengli  aqi aqiInfo  aqiLevel\n",
       "0  2018-01-01       3      -6   晴~多云       东北风   1-2级   59       良         2\n",
       "1  2018-01-02       2      -5   阴~多云       东北风   1-2级   49       优         1\n",
       "2  2018-01-03       2      -5     多云        北风   1-2级   28       优         1"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head(3)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1、汇总类统计"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>bWendu</th>\n",
       "      <th>yWendu</th>\n",
       "      <th>aqi</th>\n",
       "      <th>aqiLevel</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>count</td>\n",
       "      <td>365.000000</td>\n",
       "      <td>365.000000</td>\n",
       "      <td>365.000000</td>\n",
       "      <td>365.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>mean</td>\n",
       "      <td>18.665753</td>\n",
       "      <td>8.358904</td>\n",
       "      <td>82.183562</td>\n",
       "      <td>2.090411</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>std</td>\n",
       "      <td>11.858046</td>\n",
       "      <td>11.755053</td>\n",
       "      <td>51.936159</td>\n",
       "      <td>1.029798</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>min</td>\n",
       "      <td>-5.000000</td>\n",
       "      <td>-12.000000</td>\n",
       "      <td>21.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>25%</td>\n",
       "      <td>8.000000</td>\n",
       "      <td>-3.000000</td>\n",
       "      <td>46.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>50%</td>\n",
       "      <td>21.000000</td>\n",
       "      <td>8.000000</td>\n",
       "      <td>69.000000</td>\n",
       "      <td>2.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>75%</td>\n",
       "      <td>29.000000</td>\n",
       "      <td>19.000000</td>\n",
       "      <td>104.000000</td>\n",
       "      <td>3.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>max</td>\n",
       "      <td>38.000000</td>\n",
       "      <td>27.000000</td>\n",
       "      <td>387.000000</td>\n",
       "      <td>6.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           bWendu      yWendu         aqi    aqiLevel\n",
       "count  365.000000  365.000000  365.000000  365.000000\n",
       "mean    18.665753    8.358904   82.183562    2.090411\n",
       "std     11.858046   11.755053   51.936159    1.029798\n",
       "min     -5.000000  -12.000000   21.000000    1.000000\n",
       "25%      8.000000   -3.000000   46.000000    1.000000\n",
       "50%     21.000000    8.000000   69.000000    2.000000\n",
       "75%     29.000000   19.000000  104.000000    3.000000\n",
       "max     38.000000   27.000000  387.000000    6.000000"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 一下子提取所有数字列统计结果\n",
    "df.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "18.665753424657535"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "## 查看单个Series的数据\n",
    "df[\"bWendu\"].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "38"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 最高温\n",
    "df[\"bWendu\"].max()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "-5"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 最低温\n",
    "df[\"bWendu\"].min()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2、唯一去重和按值计数"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 2.1 唯一性去重\n",
    "一般不用于数值列，而是枚举、分类列"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['东北风', '北风', '西北风', '西南风', '南风', '东南风', '东风', '西风'], dtype=object)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[\"fengxiang\"].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['晴~多云', '阴~多云', '多云', '阴', '多云~晴', '多云~阴', '晴', '阴~小雪', '小雪~多云',\n",
       "       '小雨~阴', '小雨~雨夹雪', '多云~小雨', '小雨~多云', '大雨~小雨', '小雨', '阴~小雨',\n",
       "       '多云~雷阵雨', '雷阵雨~多云', '阴~雷阵雨', '雷阵雨', '雷阵雨~大雨', '中雨~雷阵雨', '小雨~大雨',\n",
       "       '暴雨~雷阵雨', '雷阵雨~中雨', '小雨~雷阵雨', '雷阵雨~阴', '中雨~小雨', '小雨~中雨', '雾~多云',\n",
       "       '霾'], dtype=object)"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[\"tianqi\"].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['1-2级', '4-5级', '3-4级', '2级', '1级', '3级'], dtype=object)"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[\"fengli\"].unique()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 2.2 按值计数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "南风     92\n",
       "西南风    64\n",
       "北风     54\n",
       "西北风    51\n",
       "东南风    46\n",
       "东北风    38\n",
       "东风     14\n",
       "西风      6\n",
       "Name: fengxiang, dtype: int64"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[\"fengxiang\"].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "晴         101\n",
       "多云         95\n",
       "多云~晴       40\n",
       "晴~多云       34\n",
       "多云~雷阵雨     14\n",
       "多云~阴       10\n",
       "阴~多云        8\n",
       "小雨~多云       8\n",
       "雷阵雨         8\n",
       "雷阵雨~多云      7\n",
       "小雨          6\n",
       "多云~小雨       5\n",
       "阴           4\n",
       "雷阵雨~中雨      4\n",
       "中雨~小雨       2\n",
       "中雨~雷阵雨      2\n",
       "阴~小雨        2\n",
       "霾           2\n",
       "阴~小雪        1\n",
       "小雪~多云       1\n",
       "大雨~小雨       1\n",
       "小雨~雷阵雨      1\n",
       "小雨~中雨       1\n",
       "小雨~雨夹雪      1\n",
       "雾~多云        1\n",
       "雷阵雨~阴       1\n",
       "暴雨~雷阵雨      1\n",
       "小雨~阴        1\n",
       "雷阵雨~大雨      1\n",
       "阴~雷阵雨       1\n",
       "小雨~大雨       1\n",
       "Name: tianqi, dtype: int64"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[\"tianqi\"].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1-2级    236\n",
       "3-4级     68\n",
       "1级       21\n",
       "4-5级     20\n",
       "2级       13\n",
       "3级        7\n",
       "Name: fengli, dtype: int64"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[\"fengli\"].value_counts()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 3、相关系数和协方差\n",
    "\n",
    "用途（超级厉害）：\n",
    "1. 两只股票，是不是同涨同跌？程度多大？正相关还是负相关？\n",
    "2. 产品销量的波动，跟哪些因素正相关、负相关，程度有多大？\n",
    "\n",
    "来自知乎，对于两个变量X、Y：\n",
    "1. 协方差：***衡量同向反向程度***，如果协方差为正，说明X，Y同向变化，协方差越大说明同向程度越高；如果协方差为负，说明X，Y反向运动，协方差越小说明反向程度越高。\n",
    "2. 相关系数：***衡量相似度程度***，当他们的相关系数为1时，说明两个变量变化时的正向相似度最大，当相关系数为－1时，说明两个变量变化的反向相似度最大"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>bWendu</th>\n",
       "      <th>yWendu</th>\n",
       "      <th>aqi</th>\n",
       "      <th>aqiLevel</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>bWendu</td>\n",
       "      <td>140.613247</td>\n",
       "      <td>135.529633</td>\n",
       "      <td>47.462622</td>\n",
       "      <td>0.879204</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>yWendu</td>\n",
       "      <td>135.529633</td>\n",
       "      <td>138.181274</td>\n",
       "      <td>16.186685</td>\n",
       "      <td>0.264165</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>aqi</td>\n",
       "      <td>47.462622</td>\n",
       "      <td>16.186685</td>\n",
       "      <td>2697.364564</td>\n",
       "      <td>50.749842</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>aqiLevel</td>\n",
       "      <td>0.879204</td>\n",
       "      <td>0.264165</td>\n",
       "      <td>50.749842</td>\n",
       "      <td>1.060485</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              bWendu      yWendu          aqi   aqiLevel\n",
       "bWendu    140.613247  135.529633    47.462622   0.879204\n",
       "yWendu    135.529633  138.181274    16.186685   0.264165\n",
       "aqi        47.462622   16.186685  2697.364564  50.749842\n",
       "aqiLevel    0.879204    0.264165    50.749842   1.060485"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 协方差矩阵：\n",
    "df.cov()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>bWendu</th>\n",
       "      <th>yWendu</th>\n",
       "      <th>aqi</th>\n",
       "      <th>aqiLevel</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>bWendu</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.972292</td>\n",
       "      <td>0.077067</td>\n",
       "      <td>0.071999</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>yWendu</td>\n",
       "      <td>0.972292</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.026513</td>\n",
       "      <td>0.021822</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>aqi</td>\n",
       "      <td>0.077067</td>\n",
       "      <td>0.026513</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.948883</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>aqiLevel</td>\n",
       "      <td>0.071999</td>\n",
       "      <td>0.021822</td>\n",
       "      <td>0.948883</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            bWendu    yWendu       aqi  aqiLevel\n",
       "bWendu    1.000000  0.972292  0.077067  0.071999\n",
       "yWendu    0.972292  1.000000  0.026513  0.021822\n",
       "aqi       0.077067  0.026513  1.000000  0.948883\n",
       "aqiLevel  0.071999  0.021822  0.948883  1.000000"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 相关系数矩阵\n",
    "df.corr()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.07706705916811077"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 单独查看空气质量和最高温度的相关系数\n",
    "df[\"aqi\"].corr(df[\"bWendu\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.02651328267296879"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[\"aqi\"].corr(df[\"yWendu\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.21652257576382047"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 空气质量和温差的相关系数\n",
    "df[\"aqi\"].corr(df[\"bWendu\"]-df[\"yWendu\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# !! 这就是特征工程对于机器学习重要性的一个例子"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "10.5"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "0.21/0.02"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
